{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选用了4种回归模型，最小二乘法 、 岭回归、索套、随机森林\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import model_selection\n",
    "from sklearn.ensemble import RandomForestRegressor as RFR # 随机森林\n",
    "from sklearn.linear_model import LinearRegression as LR # 最小二乘法\n",
    "from sklearn.linear_model import Ridge as R # 岭回归\n",
    "from sklearn.linear_model import Lasso as L # 索套"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  数据处理、筛选出特征值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:12: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  if sys.path[0] == '':\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:13: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  del sys.path[0]\n"
     ]
    }
   ],
   "source": [
    "my = pd.read_csv('maoyanfilms_clean.csv')\n",
    "#逐步取出nan值，保证特征列都没有nan值\n",
    "df1 = my[['films_id','films_type']].dropna()\n",
    "df1['films_type'] = df1['films_type'].apply(lambda x: x.split(',')[0])\n",
    "df2 = pd.merge(df1,my,on='films_id')\n",
    "#datas.loc[:,(datas.columns != '类型') & (datas.columns != '地区') & (datas.columns != '语言')]\n",
    "df3 = df2.loc[:,(df2.columns!='Unnamed: 0')&(df2.columns!='films_type_y')]\n",
    "df4  = df3[['films_id','films_area']].dropna()\n",
    "df4['films_area'] = df4['films_area'].apply(lambda x: x.split(',')[0])\n",
    "df5 = pd.merge(df4,df3,on='films_id')\n",
    "df6 = df5.loc[:,(df5.columns!='films_area_y')]\n",
    "df6['films_duration'] = df6['films_duration'].apply(lambda x: x[:-2])\n",
    "df6['film_first_time'] = df6['film_first_time'].apply(lambda x: str(x).split('-')[0])\n",
    "df7 = df6[['films_id','films_score','films_box_office']].dropna()\n",
    "df8=pd.merge(df7,df6,on='films_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类型</th>\n",
       "      <th>时长</th>\n",
       "      <th>年份</th>\n",
       "      <th>评分</th>\n",
       "      <th>地区</th>\n",
       "      <th>票房</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>喜剧</td>\n",
       "      <td>122</td>\n",
       "      <td>2010</td>\n",
       "      <td>7.4</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>47200.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>爱情</td>\n",
       "      <td>117</td>\n",
       "      <td>2009</td>\n",
       "      <td>6.1</td>\n",
       "      <td>中国香港</td>\n",
       "      <td>11299.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>爱情</td>\n",
       "      <td>128</td>\n",
       "      <td>2012</td>\n",
       "      <td>7.8</td>\n",
       "      <td>中国台湾</td>\n",
       "      <td>13700.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>爱情</td>\n",
       "      <td>112</td>\n",
       "      <td>2012</td>\n",
       "      <td>7.1</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>2137.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>爱情</td>\n",
       "      <td>101</td>\n",
       "      <td>2007</td>\n",
       "      <td>8.5</td>\n",
       "      <td>中国香港</td>\n",
       "      <td>33.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1079</th>\n",
       "      <td>喜剧</td>\n",
       "      <td>80</td>\n",
       "      <td>2019</td>\n",
       "      <td>8.9</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>2389.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1080</th>\n",
       "      <td>喜剧</td>\n",
       "      <td>98</td>\n",
       "      <td>2019</td>\n",
       "      <td>8.5</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>944.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1081</th>\n",
       "      <td>喜剧</td>\n",
       "      <td>84</td>\n",
       "      <td>2019</td>\n",
       "      <td>8.1</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>557.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1082</th>\n",
       "      <td>剧情</td>\n",
       "      <td>114</td>\n",
       "      <td>2019</td>\n",
       "      <td>7.9</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>6271.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1083</th>\n",
       "      <td>动画</td>\n",
       "      <td>91</td>\n",
       "      <td>2019</td>\n",
       "      <td>8.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>1127.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1084 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        类型   时长    年份   评分    地区       票房\n",
       "0      喜剧   122  2010  7.4  中国大陆  47200.0\n",
       "1      爱情   117  2009  6.1  中国香港  11299.0\n",
       "2      爱情   128  2012  7.8  中国台湾  13700.0\n",
       "3      爱情   112  2012  7.1  中国大陆   2137.0\n",
       "4      爱情   101  2007  8.5  中国香港     33.0\n",
       "...    ...  ...   ...  ...   ...      ...\n",
       "1079   喜剧    80  2019  8.9  中国大陆   2389.0\n",
       "1080   喜剧    98  2019  8.5  中国大陆    944.0\n",
       "1081   喜剧    84  2019  8.1  中国大陆    557.0\n",
       "1082   剧情   114  2019  7.9  中国大陆   6271.0\n",
       "1083   动画    91  2019  8.0  中国大陆   1127.0\n",
       "\n",
       "[1084 rows x 6 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#特征值 构成新的DataFrame\n",
    "myd=df8[['films_type_x','films_duration','film_first_time','films_score_x','films_area_x','films_box_office_x']]\n",
    "new_col = ['类型','时长','年份','评分','地区','票房']\n",
    "myd.columns = new_col\n",
    "myd\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 取出特征 构成数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#非数值型 特征值 需要处理一下\n",
    "def extract_feature(myd):\n",
    "    dftype = pd.get_dummies(myd['类型'],prefix='类型')\n",
    "    dfarea = pd.get_dummies(myd['地区'],prefix='地区')\n",
    "    datas = pd.concat([myd[['时长','年份','评分','票房']],dftype],axis=1)\n",
    "    datas = pd.concat([datas,dfarea],axis=1)\n",
    "    return datas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>时长</th>\n",
       "      <th>年份</th>\n",
       "      <th>评分</th>\n",
       "      <th>票房</th>\n",
       "      <th>类型_ 传记</th>\n",
       "      <th>类型_ 冒险</th>\n",
       "      <th>类型_ 剧情</th>\n",
       "      <th>类型_ 动作</th>\n",
       "      <th>类型_ 动画</th>\n",
       "      <th>类型_ 历史</th>\n",
       "      <th>...</th>\n",
       "      <th>地区_中国大陆</th>\n",
       "      <th>地区_中国香港</th>\n",
       "      <th>地区_俄罗斯</th>\n",
       "      <th>地区_加拿大</th>\n",
       "      <th>地区_德国</th>\n",
       "      <th>地区_法国</th>\n",
       "      <th>地区_澳大利亚</th>\n",
       "      <th>地区_美国</th>\n",
       "      <th>地区_英国</th>\n",
       "      <th>地区_韩国</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122</td>\n",
       "      <td>2010</td>\n",
       "      <td>7.4</td>\n",
       "      <td>47200.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>117</td>\n",
       "      <td>2009</td>\n",
       "      <td>6.1</td>\n",
       "      <td>11299.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>128</td>\n",
       "      <td>2012</td>\n",
       "      <td>7.8</td>\n",
       "      <td>13700.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>112</td>\n",
       "      <td>2012</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2137.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>101</td>\n",
       "      <td>2007</td>\n",
       "      <td>8.5</td>\n",
       "      <td>33.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1133</th>\n",
       "      <td>80</td>\n",
       "      <td>2019</td>\n",
       "      <td>7.8</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1134</th>\n",
       "      <td>80</td>\n",
       "      <td>2019</td>\n",
       "      <td>7.8</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1135</th>\n",
       "      <td>80</td>\n",
       "      <td>2019</td>\n",
       "      <td>7.8</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1136</th>\n",
       "      <td>80</td>\n",
       "      <td>2019</td>\n",
       "      <td>7.8</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1137</th>\n",
       "      <td>80</td>\n",
       "      <td>2019</td>\n",
       "      <td>7.8</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1138 rows × 37 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       时长    年份   评分       票房  类型_ 传记   类型_ 冒险   类型_ 剧情   类型_ 动作   类型_ 动画   \\\n",
       "0     122  2010  7.4  47200.0        0        0        0        0        0   \n",
       "1     117  2009  6.1  11299.0        0        0        0        0        0   \n",
       "2     128  2012  7.8  13700.0        0        0        0        0        0   \n",
       "3     112  2012  7.1   2137.0        0        0        0        0        0   \n",
       "4     101  2007  8.5     33.0        0        0        0        0        0   \n",
       "...   ...   ...  ...      ...      ...      ...      ...      ...      ...   \n",
       "1133   80  2019  7.8   1000.0        0        0        0        0        0   \n",
       "1134   80  2019  7.8   1200.0        0        0        0        0        0   \n",
       "1135   80  2019  7.8   1200.0        0        0        0        0        0   \n",
       "1136   80  2019  7.8   1200.0        0        0        0        0        0   \n",
       "1137   80  2019  7.8   1200.0        0        0        0        0        0   \n",
       "\n",
       "      类型_ 历史   ...  地区_中国大陆  地区_中国香港  地区_俄罗斯  地区_加拿大  地区_德国  地区_法国  地区_澳大利亚  \\\n",
       "0           0  ...        1        0       0       0      0      0        0   \n",
       "1           0  ...        0        1       0       0      0      0        0   \n",
       "2           0  ...        0        0       0       0      0      0        0   \n",
       "3           0  ...        1        0       0       0      0      0        0   \n",
       "4           0  ...        0        1       0       0      0      0        0   \n",
       "...       ...  ...      ...      ...     ...     ...    ...    ...      ...   \n",
       "1133        0  ...        1        0       0       0      0      0        0   \n",
       "1134        0  ...        1        0       0       0      0      0        0   \n",
       "1135        0  ...        1        0       0       0      0      0        0   \n",
       "1136        0  ...        1        0       0       0      0      0        0   \n",
       "1137        0  ...        1        0       0       0      0      0        0   \n",
       "\n",
       "      地区_美国  地区_英国  地区_韩国  \n",
       "0         0      0      0  \n",
       "1         0      0      0  \n",
       "2         0      0      0  \n",
       "3         0      0      0  \n",
       "4         0      0      0  \n",
       "...     ...    ...    ...  \n",
       "1133      0      0      0  \n",
       "1134      0      0      0  \n",
       "1135      0      0      0  \n",
       "1136      0      0      0  \n",
       "1137      0      0      0  \n",
       "\n",
       "[1138 rows x 37 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = extract_feature(myd)\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prediction_boxoffice(df,filmsData): # df作为数据集传入，filmaData是想预测的信息\n",
    "    df.loc[len(df)] = filmsData # 加入最后最后一行，作为想预测的数据使用\n",
    "    datas = extract_feature(df)\n",
    "    x = datas.loc[:,datas.columns != '票房'].values # 选取训练、测试集\n",
    "    y = datas.loc[:,datas.columns == '票房'].values.reshape(-1,1)\n",
    "    x_train, y_train, x_test, y_test = x[:-1], y[:-1], x[-1], y[-1]\n",
    "    x_test = x_test.reshape(1,-1)\n",
    "    Model = model()\n",
    "    Model.fit(x_train,y_train)\n",
    "    y_pred = Model.predict(x_test)\n",
    "    hhh = Model.score(x_test,y_test)\n",
    "    print(y_test)\n",
    "    return y_pred,datas,hhh\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_score(datas,model):\n",
    "    x = datas.loc[:,datas.columns != '票房'].values\n",
    "    y = datas.loc[:,datas.columns == '票房'].values.reshape(-1,1)\n",
    "    x_train, x_test, y_train, y_test = model_selection.train_test_split(x,y,test_size=0.3)\n",
    "    \n",
    "    Model = model()\n",
    "    Model.fit(x_train,y_train)\n",
    "    score = Model.score(x_test,y_test)\n",
    "    \n",
    "    return score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 预测票房"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:670: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  self._setitem_with_indexer(indexer, value)\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\sklearn\\ensemble\\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
      "  \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:9: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  if __name__ == '__main__':\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\regression.py:543: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples.\n",
      "  warnings.warn(msg, UndefinedMetricWarning)\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:670: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  self._setitem_with_indexer(indexer, value)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nan\n",
      "预测票房 1377.2万元\n",
      "nan\n",
      "预测票房 [1200.]万元\n",
      "nan\n",
      "预测票房 [1439.12726855]万元\n",
      "nan\n",
      "预测票房 1562.3337665784638万元\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\regression.py:543: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples.\n",
      "  warnings.warn(msg, UndefinedMetricWarning)\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:670: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  self._setitem_with_indexer(indexer, value)\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\regression.py:543: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples.\n",
      "  warnings.warn(msg, UndefinedMetricWarning)\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:670: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  self._setitem_with_indexer(indexer, value)\n",
      "D:\\Users\\80423\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\regression.py:543: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples.\n",
      "  warnings.warn(msg, UndefinedMetricWarning)\n"
     ]
    }
   ],
   "source": [
    "modellist = [RFR,LR,R,L]\n",
    "for m in modellist:\n",
    "    model = m\n",
    "    filmsData = ['喜剧',80,2019,7.8,'中国大陆',1200]\n",
    "    boxoffiice,datas,hhh = prediction_boxoffice(myd,filmsData)\n",
    "#     score = predict_score(datas,model)\n",
    "#     print(str(model)+'回归模型的准确率为'+str(score))\n",
    "    print(hhh)\n",
    "    print('预测票房' + ' ' + str(boxoffiice[0])+'万元')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
